
#### HOUSEKEEPING ####

rm(list = ls())

library(tidyverse)
library(data.table)

source("00_paths.R")

## Masked parameters (to protect data provider's anonymity)
last_pre_date <- NA
last_pre_mon <- NA

#### DATA ####

## LOAD SUBSCRIBER-DATE DATA
sd_df <- fread(file = paste0(dat_dir, "subdate_final.csv"))

## KEEP RELEVANT VARS
sd_df <- sd_df %>% dplyr::select(mkt, customer_key, date, tot_gb, gb_video, 
                                 gb_browsing, gb_gaming, gb_sharing, gb_other, 
                                 gb_netflix, gb_youtube, gb_linear, 
                                 vid_flag, svc_tier)

## AGGREGATE TO SUBSCRIBER-MONTH
sm_df <- sd_df %>%
  mutate(month = month(date)) %>%
  group_by(customer_key, month) %>%
  arrange(date) %>%
  mutate(vid_flag = last(vid_flag)) %>%
  mutate(svc_tier = last(svc_tier)) %>%
  mutate(across(c(tot_gb, starts_with("gb_")), ~ mean(.x, na.rm=T))) %>%
  ungroup() %>%
  dplyr::select(mkt, customer_key, month, vid_flag, svc_tier, tot_gb, starts_with("gb_")) %>%
  distinct()

fwrite(x = sm_df, file = paste0(input_dir, "submonth_all.csv"))

control_all_df <- sd_df %>% 
  filter(date == last_pre_date) %>%
  filter(mkt!="Treated") %>%
  group_by(customer_key) %>%
  arrange(date) %>%
  mutate(vid_flag = last(vid_flag)) %>%
  mutate(svc_tier = last(svc_tier)) %>%
  ungroup() %>%
  dplyr::select(customer_key, vid_flag, svc_tier)

sd_control_all_df <- sd_df %>% inner_join(control_all_df %>% dplyr::select(customer_key))
sm_control_all_df <- sm_df %>% inner_join(control_all_df %>% dplyr::select(customer_key))

fwrite(x = sd_control_all_df, file = paste0(input_dir, "subdate_control_all.csv"))
fwrite(x = control_all_df, file = paste0(input_dir, "subdate_control_list_all.csv"))
fwrite(x = sm_control_all_df, file = paste0(input_dir, "submonth_control_all.csv"))

treat_df <- sd_df %>% 
  filter(date==last_pre_date) %>%
  filter(mkt=="Treated") %>%
  group_by(customer_key) %>%
  arrange(date) %>%
  mutate(vid_flag = last(vid_flag)) %>%
  mutate(svc_tier = last(svc_tier)) %>%
  ungroup() %>%
  dplyr::select(customer_key, vid_flag, svc_tier) %>%
  distinct()

sd_treat_df <- sd_df %>% inner_join(treat_df %>% dplyr::select(customer_key))

fwrite(x = sd_treat_df, file = paste0(input_dir, "subdate_treat.csv"))
fwrite(x = treat_df, file = paste0(input_dir, "subdate_treat_list.csv"))

treat_df <- sm_df %>% 
  filter(month==last_pre_mon) %>%
  filter(mkt=="Treated") %>%
  dplyr::select(customer_key, vid_flag, svc_tier) 

sm_treat_df <- sm_df %>% inner_join(treat_df %>% dplyr::select(customer_key))

fwrite(x = sm_treat_df, file = paste0(input_dir, "submonth_treat.csv"))
fwrite(x = treat_df, file = paste0(input_dir, "submonth_treat_list.csv"))
